﻿using System.Net;
using System.Text;
using System.Text.RegularExpressions;

namespace project1
{
    internal class Program
    {
        static void Main(string[] args)
        {
            string httpcontent = GetContent("https://hao.360.com/?a1004");
            // Console.WriteLine(httpcontent); 
            //直接抓取网页上的全部内容

            WebClient wc = new WebClient();
            wc.Encoding = Encoding.UTF8;
            MatchCollection matchs = Regex.Matches(httpcontent, @"<a.*>(.*)</a>", RegexOptions.ECMAScript);
            foreach (Match item in matchs)
            {
                Console.WriteLine(item.Value);
            }

            Console.WriteLine("一共爬取到的项数为：");
            Console.WriteLine(matchs.Count);


            Console.ReadKey();
        }
        
        public static string GetContent(string url) //url就是我想要爬取的网址
        {
            System.Net.HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);//创建一个链接请求

            request.Method = "GET";//请求方式
            request.Timeout = 19999;//设置请求超时时间
            
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            //以StreamReader的形式返回请求得到的数据
            StreamReader streamreader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.UTF8);

            string responseContent = streamreader.ReadToEnd();

            streamreader.Close();
            response.Close();

            return responseContent;
        }




    }
}